/**
 * http://www.koorz.com
 * Copyright (c) 2012 shanghai meiku information technology co,.ltd
 */
package com.koorz.modules.search.meilishuo;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.jfinal.log.Logger;
import com.koorz.modules.catalog.Category;
import com.koorz.modules.catalog.CategoryLabel;
import com.koorz.modules.catalog.Label;
import com.koorz.modules.catalog.LabelGroup;
import com.koorz.modules.catalog.LabelGroupLabel;
import com.koorz.modules.search.AbstractParser;
import com.koorz.modules.search.UrlWrapper;
import com.koorz.utils.DateUtil;
import com.koorz.utils.IdManage;

/**
 * 功能描述：
 * 作        者：尹东东 
 * 创建时间：2013-6-2 下午6:20:54
 * 版  本  号：1.0
 */
public class MeilishuoParser extends AbstractParser {
	protected final Logger logger = Logger.getLogger(getClass());
	private static class SingletonHolder { 
		static final MeilishuoParser INSTANCE = new MeilishuoParser(); 
	}
	
	public static MeilishuoParser getInstance(){
		return SingletonHolder.INSTANCE;
	}
	
	private MeilishuoParser() {}
	
	/**
	 * 初始化美丽说分类/标签组/标签/url
	 */
	public void init(){
		List<UrlWrapper> urls = new ArrayList<UrlWrapper>();
		urls.add(new UrlWrapper("http://www.meilishuo.com/guang/catalog/dress?cata_id=2000000000000","衣服","",""));
		urls.add(new UrlWrapper("http://www.meilishuo.com/guang/catalog/shoes?cata_id=6000000000000","鞋子","",""));
		urls.add(new UrlWrapper("http://www.meilishuo.com/guang/catalog/bag?cata_id=5000000000000","包包","",""));
		urls.add(new UrlWrapper("http://www.meilishuo.com/guang/catalog/access?cata_id=7000000000000","配饰","",""));
		try {
			Map<String,String> cookies = new HashMap<String,String>();
			// santorini_mm=a4a695aa3eb9c41c9989d3fc440679ef; PHPSESSID=5ffb2hetrv3pvq2gciup7r0j93
			cookies.put("SEASHELL", "fMqQCVGqB4yVwzrjB+EsAg==");
			cookies.put("MEILISHUO_GLOBAL_KEY", "1e9b0fab8400a5d30130601223908456");
			cookies.put("CHANNEL_FROM", "0");
			cookies.put("santorini_mm", "a4a695aa3eb9c41c9989d3fc440679ef");
			cookies.put("PHPSESSID", "5ffb2hetrv3pvq2gciup7r0j93");
			for(UrlWrapper url:urls){
				Document doc = Jsoup.connect(url.getUrl())
						.referrer(url.getUrl())
						.cookies(cookies)
						.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36")
						.get();
				Elements els = doc.select("div.catacon h2");
				Category category = Category.getCategory("name", url.getMenu());
				for(Element group:els){
					/**
					 * 检查标签组是否存在，不存在就保存
					 */
					String labelGroupName = group.text();
					LabelGroup labelGroup = LabelGroup.dao.findFirst("select * from label_group where category_id=? and name=?", category.getStr("id"),labelGroupName);
					if(labelGroup == null){
						labelGroup = new LabelGroup();
						labelGroup.set("id", IdManage.nextId(IdManage.LABEL_GROUP));
						labelGroup.set("name", labelGroupName);
						labelGroup.set("create_time", DateUtil.getNowDateTime());
						labelGroup.set("category_id", category.getStr("id"));
						labelGroup.set("is_show", 1);
						labelGroup.set("hot", 0);
						labelGroup.set("sequence", 10);
						labelGroup.save();
					}
					
					/**
					 * 检查标签，不存在就保存
					 */
					Element des = group.nextElementSibling();
					List<String> labels = new ArrayList<String>();
					for(Element item:des.select("a")){
						Label label = Label.getLabel("name", item.text());
						if(label == null){
							label = new Label();
							label.set("name", item.text());
							label.set("hot", 0);
							label.set("check_state", Label.LabelState.SHOW.ordinal());
							Label.saveLabel(label);
						}
						//"http://www.meilishuo.com/aj/getGoods/catalog?frame=0&page=0&view=1&word=34466&cata_id=6000000000000&section=hot&price=all"
						//?cata_id=6000000000000&frame=0&word=34485&price=all&page=0&section=hot
						String href = item.attr("href");
						String word = StringUtils.substringBetween(href, "word=", "&");
						String cata_id = StringUtils.substringBetween(href, "cata_id=", "&");
						href = "http://www.meilishuo.com/aj/getGoods/catalog?frame=0&page=0&view=1&word="+word+"&cata_id="+cata_id+"&section=hot&price=all";
						navis.add(new UrlWrapper(href,
								url.getMenu(),labelGroupName,item.text(),category.getStr("id"),labelGroup.getStr("id"),label.getStr("id")));
						labels.add(label.getStr("id"));
					}
					//保存标签到分类
					CategoryLabel.saveLabelsToCategory(category.getStr("id"), labels);
					
					//保存标签到标签组
					int index = 1;
					for(String id:labels){
						LabelGroupLabel lgl = LabelGroupLabel.dao.findFirst("select * from label_group_label where label_group_id=? and label_id=?", labelGroup.getStr("id"),id);
						if(lgl == null){
							lgl = new LabelGroupLabel();
							lgl.set("id", IdManage.nextId(IdManage.LABEL_GROUP_LABEL));
							lgl.set("label_group_id", labelGroup.getStr("id"));
							lgl.set("label_id", id);
							lgl.set("hot", 0);
							lgl.set("create_time", DateUtil.getNowDateTime());
							lgl.save();
						}
						index++;
					}
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
			logger.error("获取链接失败MeilishuoParser.init()", e);
		}
		setMaxCount(navis.size());
	}
}
